﻿/*  
cd /projects/hsieh_project/proj_201809/code_1_data/
qsas data_1_sales_mgrow.sas 8 &

Author: Adarsh Kumar
Objective: Aggregate Sales data in order to get missing growth
*/

libname hr "/projects/hsieh_project/proj_201809/data/";

%Let dir_out = /projects/hsieh_project/proj_201809/data/;

/*
================================================================================
Load raw data

First read in regular LBD data to get total 1977 employment in each msa1983
*/

%include "/projects/hsieh_project/proj_201809/code_1_data/m_read.sas" /source2;

%m_read(param_dev=1,param_lyear=%bquote(1977), param_czone=0, param_msa1983=1, param_msacz=0, param_drop=1);

data lbd; 
  set lbd;
  if year = 1977;
run;

proc sort data=lbd; by msa1983; run; 

proc means data=lbd noprint; 
  by msa1983; 
  output out=dt_msa83_emp77(drop = _type_ _freq_) sum(worker) = emp_msa83_1977;
run;

data dt_msa83_emp77;
  set dt_msa83_emp77;
  rename msa1983 = msa;
run;

%include "/projects/hsieh_project/proj_201809/code_1_data/m_read_sales.sas" /source2;

%m_read_sales(param_dev=0, param_lyear=%bquote(1977, 1982, 1987, 1992, 1997, 2002, 2007, 2012), param_czone=0, param_msa1983=1, param_msacz=0, param_drop=1);

proc import out=cw_sales datafile="&dir_out./cw_year_sales_v12.dta";
run;

data cw_sales;
  set cw_sales(rename=(ch_ind=ind));
run;
/*
================================================================================
Calculate Concentration
================================================================================
*/

/*Merge emp of msa1983 in 1977 with sales data */
/*
proc sort data=lbd; by msa1983; run; 
proc sort data=dt_msa83_emp77; by msa1983; run;

data lbd;
  merge lbd dt_msa83_emp77; 
  by msa1983;
run;
*/

/*Creating cn and renaming variables to comply with CTH's code
  cn also removes zero sales establishments
*/
data lbd;
  set lbd(drop=msa);
  rename msa1983=msa;
  rename ch_ind=ind;
  if sales > 0;
run;

/* Code below comes from missing growth G Doc */

/* we will create two versions of missing growth;
** version one excludes finance and utilities and transportation and goes from 1977 to 2012;
** version two is all industries from 1992 to 2012;

  *** ONLY include this first step in version 2 *******************************;
  ** industries that exist from 1977 to 2012 *******;
  ** assume cn is name of main dataset;
*/

/*Writing macro in order to call for both versions of sales missing growth calculations */ 

%macro sales_mgrow(dt_out = );

proc sort data=cn; by year msa ind; run;

DATA cn_future;
    SET cn;
    year = year - 5;
RUN;

/* Identify common industries every 5 years */

proc means data=cn noprint;
  VAR sales;
  by year msa ind;
output out=ind_current(drop=_type_ _freq_) mean=cn;
run;

DATA ind_future;
  set ind_current(keep=year msa ind);
  year = year - 5;
run;

DATA ind (keep = year msa ind);
  merge ind_current(in=current) ind_future(in=future);
  by year msa ind;
  IF current = 1 AND future = 1;
run;

proc sort data=ind; by year msa ind; Run;

PROC sort data=cn_future; By year msa ind; RUN;

DATA inc_future (drop=incumbent);
  MERGE cn_future ind (in=incumbent);
  by year msa ind;
  IF incumbent = 1;
RUN;

DATA inc_current (drop=incumbent);
  MERGE cn ind(in=incumbent);
  by year msa ind;
  IF incumbent = 1;
run;


/*New Industries */
DATA new;
  MERGE cn_future ind(in=incumbent);
  BY year msa ind;
  IF incumbent ~= 1;
RUN;

PROC means data=new noprint;
    VAR sales;
    BY year msa;
OUTPUT out=new(drop=_type_ _freq_) sum=salesnew;
RUN;

PROC means data=cn_future noprint;
  VAR sales;
  BY year msa;
OUTPUT out=all(drop=_type_ _freq_) sum=salesall;
RUN;

proc sort data=new; by year msa; run; 
proc sort data=all; by year msa; run; 

DATA new (keep=year msa newshare);
    Merge new all;
    BY year msa;
    Newshare = salesnew/salesall;
RUN;

/* SV weights of incumbent industries */
PROC means data=inc_current noprint;
  by year msa ind;
  VAR sales;
  output out=sv_current(drop=_type_ _freq_) sum=sales_current;
run;

PROC means data=inc_future noprint;
  by year msa ind;
  VAR sales;
  output out=sv_future(drop=_type_ _freq_) sum=sales_future;
run;

DATA sv (keep=year msa ind sv);
  merge sv_current(in=current) sv_future(in=future); 
  by year msa ind;
  IF current = 1 AND future = 1;
  dif = sales_future - sales_current;
  logdif = log(sales_future/sales_current);
  sv = dif/logdif;
  if sales_future = sales_current then sv = sales_current;
run;

PROC means data=sv noprint;
  by year msa;
  VAR sv;
output out=svsum(drop=_type_ _freq_) sum=svsum;
run;

proc sort data=sv; by year msa; run;
proc sort data=svsum; by year msa; run;

DATA SV (keep=year msa ind sv);
  merge sv svsum;
  by year msa;
  sv = sv/svsum;
run;

/* end SV weights */;

/* Shares of  incumbent establishments  */
Proc sort data=inc_current;  by year msa lbdid; Run;

Proc sort data=inc_future; by year msa lbdid; run;

DATA cn_est (keep = year msa ind sales_current sales_future);
  merge inc_current(in=current rename=(sales=sales_current) ) 
      inc_future(in=future rename=(sales=sales_future));
  by year msa lbdid;
  IF current = 1 AND future = 1;
Run;

proc sort data=cn_est; by year msa ind; run;

PROC means data=cn_est noprint;
  VAR sales_current sales_future;
  BY year msa ind;
OUTPUT out=cn_est(drop=_type_ _freq_) SUM=sales_current sales_future;
RUN;

Proc sort data=inc_current; by year msa ind; run;
Proc sort data=inc_future; by year msa ind; run;

PROC means data=inc_current noprint;
  VAR sales;
  By year msa ind;
OUTPUT out=inc_sumcurrent(drop=_type_ _freq_) sum=salesind_current;
Run;

PROC means data=inc_future noprint;
  VAR sales;
  By year msa ind;
OUTPUT out=inc_sumfuture(drop=_type_ _freq_) sum=salesind_future;
Run;

proc sort data=sv; by year msa ind; run;
proc sort data=cn_est; by year msa ind; run;
proc sort data=inc_sumcurrent; by year msa ind; run;
proc sort data=inc_sumfuture; by year msa ind; run;

DATA cn_est (keep=year msa ind dif);
   Merge cn_est inc_sumcurrent inc_sumfuture sv;
   BY year msa ind;
   Sales_current = sales_current/salesind_current;
   Sales_future = sales_future/salesind_future;
   Dif = sv*log(sales_current/sales_future);
Run;

PROC means data=cn_est noprint;
   VAR dif;
   BY year msa;
OUTPUT out=incumbent(drop=_type_ _freq_) sum=inc_missing;
RUN;

DATA missinggrowth;
   MERGE incumbent new;
   BY year msa;
RUN;

/*
PROC means data=missinggrowth noprint;
   VAR inc_missing newshare;
   BY MSA;
OUTPUT out=missinggrowth(drop=_type_ _freq_) mean=inc_missing newshare;
*/

/* Introduce Sales share by MSA weights to missinggrowth */
proc sort data=cn; by year; run;

proc means data=cn noprint;
  VAR sales; 
  by year; 
  output out=tot_sales(drop=_type_ _freq_) sum=tot_sales;
run;

proc sort data=cn; by year msa; run;

proc means data=cn noprint;
  VAR sales;
  by year msa; 
  output out=msa_sales(drop = _type_ _freq_) sum=msa_sales;
run;

proc sort data=tot_sales; by year; run;
proc sort data=msa_sales; by year; run;

data msa_sales_current;
  merge msa_sales tot_sales;
  by year;
  sales_sh = msa_sales / tot_sales;
run;

data msa_sales_future;
  set msa_sales_current;
  year = year - 5;
  rename sales_sh = sales_sh_future;
run;

proc sort data=msa_sales_current; by year msa; run;
proc sort data=msa_sales_future;by year msa; run;

data msa_sales_weight;
  merge msa_sales_current msa_sales_future; 
  by year msa;
  sales_sh_final = (sales_sh + sales_sh_future) / 2;
run;

proc sort data=msa_sales_weight; by year msa; run;
proc sort data=missinggrowth; by year msa; run;
proc sort data=dt_msa83_emp77; by msa; run;

data &dt_out.; 
  merge missinggrowth msa_sales_weight;
  by year msa;
run;

proc sort data=&dt_out.; by msa; run;

data &dt_out.;
  merge &dt_out. dt_msa83_emp77;
  by msa;
run;

%mend;


/* Call macro for 2 versions: 
Version 1: Industries with sales data in 1977 and 2012
Version 2: Industries with sales data in 1992 (and before) and 2012
*/

proc sort data=lbd; by ind; run;
proc sort data=cw_sales; by ind; run;

data cn;
  merge lbd cw_sales(in=in_cw); 
  by ind; 
  if in_cw = 1;
  if year_min = 1977;
run;

%sales_mgrow(dt_out = ver1);

data cn;
  merge lbd cw_sales(in=in_cw); 
  by ind; 
  if in_cw = 1;
  if year_min <= 1992;
  if year in (1992, 1997, 2002, 2007, 2012);
run;

%sales_mgrow(dt_out = ver2);

/* Export */
proc export data=ver1 outfile = "&dir_out./sales_mgrow_msa_v1_1977.dta" replace; 
run; 

proc export data=ver2 outfile = "&dir_out./sales_mgrow_msa_v2_1992.dta" replace; 
run; 


/* End of SAS file */

  
